<!DOCTYPE html>
<html lang="en-US">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width,initial-scale=1">
    <title>什么是 BloomFilter | JavaKeeper</title>
    <meta name="generator" content="VuePress 1.5.4">
    <link rel="icon" href="/icon.svg">
    <script>
        var _hmt = _hmt || [];
        (function() {
            var hm = document.createElement("script");
            hm.src = "https://hm.baidu.com/hm.js?a949a9b30eb86ac0159e735ff8670c03";
            var s = document.getElementsByTagName("script")[0];
            s.parentNode.insertBefore(hm, s);
            // 引入谷歌,不需要可删除这段
            var hm1 = document.createElement("script");
            hm1.src = "https://www.googletagmanager.com/gtag/js?id=UA-169923503-1";
            var s1 = document.getElementsByTagName("script")[0]; 
            s1.parentNode.insertBefore(hm1, s1);
        })();
        // 谷歌加载,不需要可删除
        window.dataLayer = window.dataLayer || [];
        function gtag(){dataLayer.push(arguments);}
        gtag('js', new Date());
        gtag('config', 'UA-169923503-1');
    </script>
    <meta name="description" content="">
    <meta name="keywords" content="JavaKeeper,Java,Java开发,算法,blog">
    <link rel="preload" href="/assets/css/0.styles.91f57736.css" as="style"><link rel="preload" href="/assets/js/app.447d4224.js" as="script"><link rel="preload" href="/assets/js/3.9d76740c.js" as="script"><link rel="preload" href="/assets/js/1.c4fd7d2e.js" as="script"><link rel="preload" href="/assets/js/33.0c53373c.js" as="script"><link rel="prefetch" href="/assets/js/10.8cf3be2c.js"><link rel="prefetch" href="/assets/js/100.74f35ab8.js"><link rel="prefetch" href="/assets/js/101.7a062346.js"><link rel="prefetch" href="/assets/js/102.c9485235.js"><link rel="prefetch" href="/assets/js/103.d88a3805.js"><link rel="prefetch" href="/assets/js/104.6e034144.js"><link rel="prefetch" href="/assets/js/105.d22f7450.js"><link rel="prefetch" href="/assets/js/106.a6cb54b0.js"><link rel="prefetch" href="/assets/js/107.7b65e72b.js"><link rel="prefetch" href="/assets/js/108.eb5804bb.js"><link rel="prefetch" href="/assets/js/109.05f775e5.js"><link rel="prefetch" href="/assets/js/11.c54ae13c.js"><link rel="prefetch" href="/assets/js/110.51d3d641.js"><link rel="prefetch" href="/assets/js/111.022b64a7.js"><link rel="prefetch" href="/assets/js/112.da8afd52.js"><link rel="prefetch" href="/assets/js/113.05a17b18.js"><link rel="prefetch" href="/assets/js/114.8960d913.js"><link rel="prefetch" href="/assets/js/115.67919f68.js"><link rel="prefetch" href="/assets/js/116.62b0cd71.js"><link rel="prefetch" href="/assets/js/117.ebac3eff.js"><link rel="prefetch" href="/assets/js/118.ecd629bd.js"><link rel="prefetch" href="/assets/js/119.a09a0897.js"><link rel="prefetch" href="/assets/js/12.60aa3b24.js"><link rel="prefetch" href="/assets/js/120.bf639d3d.js"><link rel="prefetch" href="/assets/js/121.b89d0c8e.js"><link rel="prefetch" href="/assets/js/122.1a75ff83.js"><link rel="prefetch" href="/assets/js/123.d2127132.js"><link rel="prefetch" href="/assets/js/124.2caff9e0.js"><link rel="prefetch" href="/assets/js/125.9b9f966a.js"><link rel="prefetch" href="/assets/js/126.58cdfb3d.js"><link rel="prefetch" href="/assets/js/127.8ef09c53.js"><link rel="prefetch" href="/assets/js/128.efdc2ae4.js"><link rel="prefetch" href="/assets/js/129.e35cbc57.js"><link rel="prefetch" href="/assets/js/13.125c13a0.js"><link rel="prefetch" href="/assets/js/130.f01a55e3.js"><link rel="prefetch" href="/assets/js/131.65205f4a.js"><link rel="prefetch" href="/assets/js/132.f42c5a0a.js"><link rel="prefetch" href="/assets/js/133.9ba468b3.js"><link rel="prefetch" href="/assets/js/134.7b597ba9.js"><link rel="prefetch" href="/assets/js/135.fb828b9a.js"><link rel="prefetch" href="/assets/js/136.3887532f.js"><link rel="prefetch" href="/assets/js/137.549bae01.js"><link rel="prefetch" href="/assets/js/138.db8d423d.js"><link rel="prefetch" href="/assets/js/139.dbaf2267.js"><link rel="prefetch" href="/assets/js/14.bd1d0b0d.js"><link rel="prefetch" href="/assets/js/140.6cb65fdc.js"><link rel="prefetch" href="/assets/js/141.9bd6cc4b.js"><link rel="prefetch" href="/assets/js/142.552db5ed.js"><link rel="prefetch" href="/assets/js/143.2c9f2bf4.js"><link rel="prefetch" href="/assets/js/144.fba98a15.js"><link rel="prefetch" href="/assets/js/145.c42f3a21.js"><link rel="prefetch" href="/assets/js/146.596d4d33.js"><link rel="prefetch" href="/assets/js/147.c48ae5c1.js"><link rel="prefetch" href="/assets/js/148.71064871.js"><link rel="prefetch" href="/assets/js/149.16582d21.js"><link rel="prefetch" href="/assets/js/15.f247873b.js"><link rel="prefetch" href="/assets/js/150.ead09aca.js"><link rel="prefetch" href="/assets/js/151.971fdf4b.js"><link rel="prefetch" href="/assets/js/152.369c9362.js"><link rel="prefetch" href="/assets/js/153.371edd15.js"><link rel="prefetch" href="/assets/js/154.e090b491.js"><link rel="prefetch" href="/assets/js/155.c68bf602.js"><link rel="prefetch" href="/assets/js/156.304aea8d.js"><link rel="prefetch" href="/assets/js/157.83beef7f.js"><link rel="prefetch" href="/assets/js/158.bb1794b0.js"><link rel="prefetch" href="/assets/js/159.2d54e792.js"><link rel="prefetch" href="/assets/js/16.04336c71.js"><link rel="prefetch" href="/assets/js/160.99d56586.js"><link rel="prefetch" href="/assets/js/161.edf660aa.js"><link rel="prefetch" href="/assets/js/162.0b84606e.js"><link rel="prefetch" href="/assets/js/163.b59e0d60.js"><link rel="prefetch" href="/assets/js/164.d9eb8228.js"><link rel="prefetch" href="/assets/js/165.ca624c79.js"><link rel="prefetch" href="/assets/js/166.025b2ba1.js"><link rel="prefetch" href="/assets/js/167.abc982cc.js"><link rel="prefetch" href="/assets/js/168.27ca13dc.js"><link rel="prefetch" href="/assets/js/169.41e753a2.js"><link rel="prefetch" href="/assets/js/17.43b3c1c8.js"><link rel="prefetch" href="/assets/js/170.626319e1.js"><link rel="prefetch" href="/assets/js/171.a221dddf.js"><link rel="prefetch" href="/assets/js/172.464b2361.js"><link rel="prefetch" href="/assets/js/173.96a3afee.js"><link rel="prefetch" href="/assets/js/174.116607c2.js"><link rel="prefetch" href="/assets/js/175.ea3e8659.js"><link rel="prefetch" href="/assets/js/176.7d7b8afc.js"><link rel="prefetch" href="/assets/js/177.a6e00aa0.js"><link rel="prefetch" href="/assets/js/178.1f93afaf.js"><link rel="prefetch" href="/assets/js/179.3aa00dcd.js"><link rel="prefetch" href="/assets/js/18.d81b44d5.js"><link rel="prefetch" href="/assets/js/180.f8b2b75a.js"><link rel="prefetch" href="/assets/js/181.8e11258a.js"><link rel="prefetch" href="/assets/js/182.22243941.js"><link rel="prefetch" href="/assets/js/183.d051fdf6.js"><link rel="prefetch" href="/assets/js/184.a994075e.js"><link rel="prefetch" href="/assets/js/185.776c7e16.js"><link rel="prefetch" href="/assets/js/186.f1887955.js"><link rel="prefetch" href="/assets/js/187.da0d3626.js"><link rel="prefetch" href="/assets/js/188.8dfc358f.js"><link rel="prefetch" href="/assets/js/189.dcac5a59.js"><link rel="prefetch" href="/assets/js/19.1b3d66e1.js"><link rel="prefetch" href="/assets/js/190.c7e413d0.js"><link rel="prefetch" href="/assets/js/191.d9806121.js"><link rel="prefetch" href="/assets/js/192.869791da.js"><link rel="prefetch" href="/assets/js/193.2d74c4c8.js"><link rel="prefetch" href="/assets/js/194.c73a1909.js"><link rel="prefetch" href="/assets/js/195.e8c74834.js"><link rel="prefetch" href="/assets/js/20.bd5949ec.js"><link rel="prefetch" href="/assets/js/21.3fcf98cf.js"><link rel="prefetch" href="/assets/js/22.2fa1e2e8.js"><link rel="prefetch" href="/assets/js/23.1ae64bb4.js"><link rel="prefetch" href="/assets/js/24.7bdf7387.js"><link rel="prefetch" href="/assets/js/25.392c436e.js"><link rel="prefetch" href="/assets/js/26.58acbd4b.js"><link rel="prefetch" href="/assets/js/27.c725bdd5.js"><link rel="prefetch" href="/assets/js/28.6c9bda1e.js"><link rel="prefetch" href="/assets/js/29.e656b537.js"><link rel="prefetch" href="/assets/js/30.2c326fc7.js"><link rel="prefetch" href="/assets/js/31.e6c9fa30.js"><link rel="prefetch" href="/assets/js/32.c9c88437.js"><link rel="prefetch" href="/assets/js/34.9821e543.js"><link rel="prefetch" href="/assets/js/35.de8253eb.js"><link rel="prefetch" href="/assets/js/36.d182f929.js"><link rel="prefetch" href="/assets/js/37.9fa79014.js"><link rel="prefetch" href="/assets/js/38.9bebff76.js"><link rel="prefetch" href="/assets/js/39.19a3a2d4.js"><link rel="prefetch" href="/assets/js/4.564edb9d.js"><link rel="prefetch" href="/assets/js/40.cca6955f.js"><link rel="prefetch" href="/assets/js/41.854cd09a.js"><link rel="prefetch" href="/assets/js/42.ca7b612f.js"><link rel="prefetch" href="/assets/js/43.87027d58.js"><link rel="prefetch" href="/assets/js/44.8c2b4f4b.js"><link rel="prefetch" href="/assets/js/45.dffb4e08.js"><link rel="prefetch" href="/assets/js/46.f58049a5.js"><link rel="prefetch" href="/assets/js/47.6854070c.js"><link rel="prefetch" href="/assets/js/48.6cd9fa3d.js"><link rel="prefetch" href="/assets/js/49.e8861afa.js"><link rel="prefetch" href="/assets/js/5.5c31d62f.js"><link rel="prefetch" href="/assets/js/50.703bffab.js"><link rel="prefetch" href="/assets/js/51.6655c373.js"><link rel="prefetch" href="/assets/js/52.deb2eb09.js"><link rel="prefetch" href="/assets/js/53.6e0ed77d.js"><link rel="prefetch" href="/assets/js/54.b05c58ad.js"><link rel="prefetch" href="/assets/js/55.49c8164e.js"><link rel="prefetch" href="/assets/js/56.a5574e6b.js"><link rel="prefetch" href="/assets/js/57.58cb0de4.js"><link rel="prefetch" href="/assets/js/58.52345112.js"><link rel="prefetch" href="/assets/js/59.663ce78d.js"><link rel="prefetch" href="/assets/js/6.a9df34ee.js"><link rel="prefetch" href="/assets/js/60.f06adde2.js"><link rel="prefetch" href="/assets/js/61.170255a1.js"><link rel="prefetch" href="/assets/js/62.9d120050.js"><link rel="prefetch" href="/assets/js/63.70cced6b.js"><link rel="prefetch" href="/assets/js/64.577f3548.js"><link rel="prefetch" href="/assets/js/65.c037b29d.js"><link rel="prefetch" href="/assets/js/66.7dd1045f.js"><link rel="prefetch" href="/assets/js/67.d3aa4d6c.js"><link rel="prefetch" href="/assets/js/68.526dbb61.js"><link rel="prefetch" href="/assets/js/69.58269266.js"><link rel="prefetch" href="/assets/js/7.6609d4d6.js"><link rel="prefetch" href="/assets/js/70.64108f1b.js"><link rel="prefetch" href="/assets/js/71.1e95e0a6.js"><link rel="prefetch" href="/assets/js/72.42e7ec94.js"><link rel="prefetch" href="/assets/js/73.dad4e1c5.js"><link rel="prefetch" href="/assets/js/74.28ea286a.js"><link rel="prefetch" href="/assets/js/75.dd6d4c6f.js"><link rel="prefetch" href="/assets/js/76.ca6539df.js"><link rel="prefetch" href="/assets/js/77.feb13b0e.js"><link rel="prefetch" href="/assets/js/78.321e90e6.js"><link rel="prefetch" href="/assets/js/79.68eb8fcf.js"><link rel="prefetch" href="/assets/js/8.396d51fd.js"><link rel="prefetch" href="/assets/js/80.4edb5321.js"><link rel="prefetch" href="/assets/js/81.735d7e57.js"><link rel="prefetch" href="/assets/js/82.fa120bdf.js"><link rel="prefetch" href="/assets/js/83.bf755f94.js"><link rel="prefetch" href="/assets/js/84.9b32070c.js"><link rel="prefetch" href="/assets/js/85.592aca7c.js"><link rel="prefetch" href="/assets/js/86.4dcd9e73.js"><link rel="prefetch" href="/assets/js/87.a9e546aa.js"><link rel="prefetch" href="/assets/js/88.2a423212.js"><link rel="prefetch" href="/assets/js/89.5f455115.js"><link rel="prefetch" href="/assets/js/9.adb074c6.js"><link rel="prefetch" href="/assets/js/90.5202da0a.js"><link rel="prefetch" href="/assets/js/91.02cee99d.js"><link rel="prefetch" href="/assets/js/92.f16bad0b.js"><link rel="prefetch" href="/assets/js/93.f933634f.js"><link rel="prefetch" href="/assets/js/94.8e7b1d65.js"><link rel="prefetch" href="/assets/js/95.ee0e4a0a.js"><link rel="prefetch" href="/assets/js/96.e21d78c2.js"><link rel="prefetch" href="/assets/js/97.c87e514e.js"><link rel="prefetch" href="/assets/js/98.d123ac92.js"><link rel="prefetch" href="/assets/js/99.92d1b416.js">
    <link rel="stylesheet" href="/assets/css/0.styles.91f57736.css">
  </head>
  <body>
    <div id="app" data-server-rendered="true"><div class="theme-container no-sidebar" data-v-3ba18f14><div data-v-3ba18f14><div id="loader-wrapper" class="loading-wrapper" data-v-041fef5b data-v-3ba18f14 data-v-3ba18f14><div class="loader-main" data-v-041fef5b><div data-v-041fef5b></div><div data-v-041fef5b></div><div data-v-041fef5b></div><div data-v-041fef5b></div></div> <!----> <!----></div> <div class="password-shadow password-wrapper-out" style="display:none;" data-v-68139a52 data-v-3ba18f14 data-v-3ba18f14><h3 class="title" style="display:none;" data-v-68139a52 data-v-68139a52>JavaKeeper</h3> <!----> <label id="box" class="inputBox" style="display:none;" data-v-68139a52 data-v-68139a52><input type="password" value="" data-v-68139a52> <span data-v-68139a52>Konck! Knock!</span> <button data-v-68139a52>OK</button></label> <div class="footer" style="display:none;" data-v-68139a52 data-v-68139a52><span data-v-68139a52><i class="iconfont reco-theme" data-v-68139a52></i> <a target="blank" href="https://vuepress-theme-reco.recoluan.com" data-v-68139a52>vuePress-theme-reco</a></span> <span data-v-68139a52><i class="iconfont reco-copyright" data-v-68139a52></i> <a data-v-68139a52><span data-v-68139a52>海星</span>
            
          <!---->
          2020
        </a></span></div></div> <div class="hide" data-v-3ba18f14><header class="navbar" data-v-3ba18f14><div class="sidebar-button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" role="img" viewBox="0 0 448 512" class="icon"><path fill="currentColor" d="M436 124H12c-6.627 0-12-5.373-12-12V80c0-6.627 5.373-12 12-12h424c6.627 0 12 5.373 12 12v32c0 6.627-5.373 12-12 12zm0 160H12c-6.627 0-12-5.373-12-12v-32c0-6.627 5.373-12 12-12h424c6.627 0 12 5.373 12 12v32c0 6.627-5.373 12-12 12zm0 160H12c-6.627 0-12-5.373-12-12v-32c0-6.627 5.373-12 12-12h424c6.627 0 12 5.373 12 12v32c0 6.627-5.373 12-12 12z"></path></svg></div> <a href="/" class="home-link router-link-active"><!----> <span class="site-name">JavaKeeper</span></a> <div class="links"><div class="color-picker"><a class="color-button"><i class="iconfont reco-color"></i></a> <div class="color-picker-menu" style="display:none;"><div class="mode-options"><h4 class="title">Choose mode</h4> <ul class="color-mode-options"><li class="dark">dark</li><li class="auto active">auto</li><li class="light">light</li></ul></div></div></div> <div class="search-box"><i class="iconfont reco-search"></i> <input aria-label="Search" autocomplete="off" spellcheck="false" value=""> <!----></div> <nav class="nav-links can-hide"><div class="nav-item"><a href="/java/" class="nav-link"><i class="iconfont undefined"></i>
  Java
</a></div><div class="nav-item"><a href="/data-structure-algorithms/" class="nav-link"><i class="iconfont undefined"></i>
  数据结构与算法
</a></div><div class="nav-item"><a href="/data-store/" class="nav-link"><i class="iconfont undefined"></i>
  数据存储与缓存
</a></div><div class="nav-item"><a href="/interview/" class="nav-link"><i class="iconfont undefined"></i>
  直击面试
</a></div> <a href="https://github.com/Jstarfish/JavaKeeper" target="_blank" rel="noopener noreferrer" class="repo-link"><i class="iconfont reco-github"></i>
    GitHub
    <svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a></nav></div></header> <div class="sidebar-mask" data-v-3ba18f14></div> <aside class="sidebar" data-v-3ba18f14><div class="personal-info-wrapper" data-v-5f6acefd data-v-3ba18f14><!----> <h3 class="name" data-v-5f6acefd>
    海星
  </h3> <div class="num" data-v-5f6acefd><div data-v-5f6acefd><h3 data-v-5f6acefd>0</h3> <h6 data-v-5f6acefd>Article</h6></div> <div data-v-5f6acefd><h3 data-v-5f6acefd>0</h3> <h6 data-v-5f6acefd>Tag</h6></div></div> <hr data-v-5f6acefd></div> <nav class="nav-links"><div class="nav-item"><a href="/java/" class="nav-link"><i class="iconfont undefined"></i>
  Java
</a></div><div class="nav-item"><a href="/data-structure-algorithms/" class="nav-link"><i class="iconfont undefined"></i>
  数据结构与算法
</a></div><div class="nav-item"><a href="/data-store/" class="nav-link"><i class="iconfont undefined"></i>
  数据存储与缓存
</a></div><div class="nav-item"><a href="/interview/" class="nav-link"><i class="iconfont undefined"></i>
  直击面试
</a></div> <a href="https://github.com/Jstarfish/JavaKeeper" target="_blank" rel="noopener noreferrer" class="repo-link"><i class="iconfont reco-github"></i>
    GitHub
    <svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a></nav> <!----> </aside> <div class="password-shadow password-wrapper-in" style="display:none;" data-v-68139a52 data-v-3ba18f14><h3 class="title" style="display:none;" data-v-68139a52 data-v-68139a52></h3> <!----> <label id="box" class="inputBox" style="display:none;" data-v-68139a52 data-v-68139a52><input type="password" value="" data-v-68139a52> <span data-v-68139a52>Konck! Knock!</span> <button data-v-68139a52>OK</button></label> <div class="footer" style="display:none;" data-v-68139a52 data-v-68139a52><span data-v-68139a52><i class="iconfont reco-theme" data-v-68139a52></i> <a target="blank" href="https://vuepress-theme-reco.recoluan.com" data-v-68139a52>vuePress-theme-reco</a></span> <span data-v-68139a52><i class="iconfont reco-copyright" data-v-68139a52></i> <a data-v-68139a52><span data-v-68139a52>海星</span>
            
          <!---->
          2020
        </a></span></div></div> <div data-v-3ba18f14><main class="page"><div class="page-title" style="display:none;"><h1 class="title">什么是 BloomFilter</h1> <div data-v-5d8dbdb4><i class="iconfont reco-account" data-v-5d8dbdb4><span data-v-5d8dbdb4>海星</span></i> <!----> <!----> <!----></div></div> <div class="theme-reco-content content__default" style="display:none;"><h2 id="什么是-bloomfilter"><a href="#什么是-bloomfilter" class="header-anchor">#</a> 什么是 BloomFilter</h2> <p><strong>布隆过滤器</strong>（英语：Bloom Filter）是 1970 年由布隆提出的。它实际上是一个很长的二进制向量和一系列随机映射函数。主要用于判断一个元素是否在一个集合中。</p> <p>通常我们会遇到很多要判断一个元素是否在某个集合中的业务场景，一般想到的是将集合中所有元素保存起来，然后通过比较确定。链表、树、散列表（又叫哈希表，Hash table）等等数据结构都是这种思路。但是随着集合中元素的增加，我们需要的存储空间也会呈现线性增长，最终达到瓶颈。同时检索速度也越来越慢，上述三种结构的检索时间复杂度分别为$O(n)$，$O(logn)$，$O(1)$。</p> <p>这个时候，布隆过滤器（Bloom Filter）就应运而生。</p> <h2 id="布隆过滤器原理"><a href="#布隆过滤器原理" class="header-anchor">#</a> 布隆过滤器原理</h2> <p>了解布隆过滤器原理之前，先回顾下 Hash 函数原理。</p> <h3 id="哈希函数"><a href="#哈希函数" class="header-anchor">#</a> 哈希函数</h3> <p>哈希函数的概念是：将任意大小的输入数据转换成特定大小的输出数据的函数，转换后的数据称为哈希值或哈希编码，也叫散列值。下面是一幅示意图：</p> <p><img src="https://tva1.sinaimg.cn/large/007S8ZIlly1ge9vykn7uej31at0u01kx.jpg" alt=""></p> <p>所有散列函数都有如下基本特性：</p> <ul><li><p>如果两个散列值是不相同的（根据同一函数），那么这两个散列值的原始输入也是不相同的。这个特性是散列函数具有确定性的结果，具有这种性质的散列函数称为<strong>单向散列函数</strong>。</p></li> <li><p>散列函数的输入和输出不是唯一对应关系的，如果两个散列值相同，两个输入值很可能是相同的，但也可能不同，这种情况称为“<strong>散列碰撞</strong>（collision）”。</p></li></ul> <p>但是用 hash表存储大数据量时，空间效率还是很低，当只有一个 hash 函数时，还很容易发生哈希碰撞。</p> <h3 id="布隆过滤器数据结构"><a href="#布隆过滤器数据结构" class="header-anchor">#</a> 布隆过滤器数据结构</h3> <p>BloomFilter 是由一个固定大小的二进制向量或者位图（bitmap）和一系列映射函数组成的。</p> <p>在初始状态时，对于长度为 m 的位数组，它的所有位都被置为0，如下图所示：</p> <p><img src="https://tva1.sinaimg.cn/large/007S8ZIlly1gebubj40y3j312e0i277i.jpg" alt=""></p> <p>当有变量被加入集合时，通过 K 个映射函数将这个变量映射成位图中的 K 个点，把它们置为 1（假定有两个变量都通过 3 个映射函数）。</p> <p><img src="https://tva1.sinaimg.cn/large/007S8ZIlly1gebuc2ee3dj31o10u0drg.jpg" alt=""></p> <p>查询某个变量的时候我们只要看看这些点是不是都是 1 就可以大概率知道集合中有没有它了</p> <ul><li>如果这些点有任何一个 0，则被查询变量一定不在；</li> <li>如果都是 1，则被查询变量很<strong>可能存在</strong></li></ul> <p>为什么说是可能存在，而不是一定存在呢？那是因为映射函数本身就是散列函数，散列函数是会有碰撞的。</p> <h3 id="误判率"><a href="#误判率" class="header-anchor">#</a> 误判率</h3> <p>布隆过滤器的误判是指多个输入经过哈希之后在相同的bit位置1了，这样就无法判断究竟是哪个输入产生的，因此误判的根源在于相同的 bit 位被多次映射且置 1。</p> <p>这种情况也造成了布隆过滤器的删除问题，因为布隆过滤器的每一个 bit 并不是独占的，很有可能多个元素共享了某一位。如果我们直接删除这一位的话，会影响其他的元素。(比如上图中的第 3 位)</p> <h3 id="特性"><a href="#特性" class="header-anchor">#</a> 特性</h3> <ul><li><strong>一个元素如果判断结果为存在的时候元素不一定存在，但是判断结果为不存在的时候则一定不存在</strong>。</li> <li><strong>布隆过滤器可以添加元素，但是不能删除元素</strong>。因为删掉元素会导致误判率增加。</li></ul> <h3 id="添加与查询元素步骤"><a href="#添加与查询元素步骤" class="header-anchor">#</a> 添加与查询元素步骤</h3> <h4 id="添加元素"><a href="#添加元素" class="header-anchor">#</a> 添加元素</h4> <ol><li>将要添加的元素给 k 个哈希函数</li> <li>得到对应于位数组上的 k 个位置</li> <li>将这k个位置设为 1</li></ol> <h4 id="查询元素"><a href="#查询元素" class="header-anchor">#</a> 查询元素</h4> <ol><li>将要查询的元素给k个哈希函数</li> <li>得到对应于位数组上的k个位置</li> <li>如果k个位置有一个为 0，则肯定不在集合中</li> <li>如果k个位置全部为 1，则可能在集合中</li></ol> <h3 id="优点"><a href="#优点" class="header-anchor">#</a> 优点</h3> <p>相比于其它的数据结构，布隆过滤器在空间和时间方面都有巨大的优势。布隆过滤器存储空间和插入/查询时间都是常数 $O(K)$，另外，散列函数相互之间没有关系，方便由硬件并行实现。布隆过滤器不需要存储元素本身，在某些对保密要求非常严格的场合有优势。</p> <p>布隆过滤器可以表示全集，其它任何数据结构都不能；</p> <h3 id="缺点"><a href="#缺点" class="header-anchor">#</a> 缺点</h3> <p>但是布隆过滤器的缺点和优点一样明显。误算率是其中之一。随着存入的元素数量增加，误算率随之增加。但是如果元素数量太少，则使用散列表足矣。</p> <p>另外，一般情况下不能从布隆过滤器中删除元素。我们很容易想到把位数组变成整数数组，每插入一个元素相应的计数器加 1, 这样删除元素时将计数器减掉就可以了。然而要保证安全地删除元素并非如此简单。首先我们必须保证删除的元素的确在布隆过滤器里面。这一点单凭这个过滤器是无法保证的。另外计数器回绕也会造成问题。</p> <p>在降低误算率方面，有不少工作，使得出现了很多布隆过滤器的变种。</p> <h2 id="布隆过滤器使用场景和实例"><a href="#布隆过滤器使用场景和实例" class="header-anchor">#</a> 布隆过滤器使用场景和实例</h2> <p>在程序的世界中，布隆过滤器是程序员的一把利器，利用它可以快速地解决项目中一些比较棘手的问题。</p> <p>如网页 URL 去重、垃圾邮件识别、大集合中重复元素的判断和缓存穿透等问题。</p> <p>布隆过滤器的典型应用有：</p> <ul><li><p>数据库防止穿库。 Google Bigtable，HBase 和 Cassandra 以及 Postgresql 使用BloomFilter来减少不存在的行或列的磁盘查找。避免代价高昂的磁盘查找会大大提高数据库查询操作的性能。</p></li> <li><p>业务场景中判断用户是否阅读过某视频或文章，比如抖音或头条，当然会导致一定的误判，但不会让用户看到重复的内容。</p></li> <li><p>缓存宕机、缓存击穿场景，一般判断用户是否在缓存中，如果在则直接返回结果，不在则查询db，如果来一波冷数据，会导致缓存大量击穿，造成雪崩效应，这时候可以用布隆过滤器当缓存的索引，只有在布隆过滤器中，才去查询缓存，如果没查询到，则穿透到db。如果不在布隆器中，则直接返回。</p></li> <li><p>WEB拦截器，如果相同请求则拦截，防止重复被攻击。用户第一次请求，将请求参数放入布隆过滤器中，当第二次请求时，先判断请求参数是否被布隆过滤器命中。可以提高缓存命中率。Squid 网页代理缓存服务器在 cache digests 中就使用了布隆过滤器。Google Chrome浏览器使用了布隆过滤器加速安全浏览服务</p></li> <li><p>Venti 文档存储系统也采用布隆过滤器来检测先前存储的数据。</p></li> <li><p>SPIN 模型检测器也使用布隆过滤器在大规模验证问题时跟踪可达状态空间。</p></li></ul> <h2 id="coding"><a href="#coding" class="header-anchor">#</a> Coding~</h2> <p>知道了布隆过滤去的原理和使用场景，我们可以自己实现一个简单的布隆过滤器</p> <h3 id="自定义的-bloomfilter"><a href="#自定义的-bloomfilter" class="header-anchor">#</a> 自定义的 BloomFilter</h3> <div class="language-java extra-class"><pre class="language-java"><code><span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">MyBloomFilter</span> <span class="token punctuation">{</span>

    <span class="token comment">/**
     * 一个长度为10 亿的比特位
     */</span>
    <span class="token keyword">private</span> <span class="token keyword">static</span> <span class="token keyword">final</span> <span class="token keyword">int</span> DEFAULT_SIZE <span class="token operator">=</span> <span class="token number">256</span> <span class="token operator">&lt;&lt;</span> <span class="token number">22</span><span class="token punctuation">;</span>

    <span class="token comment">/**
     * 为了降低错误率，使用加法hash算法，所以定义一个8个元素的质数数组
     */</span>
    <span class="token keyword">private</span> <span class="token keyword">static</span> <span class="token keyword">final</span> <span class="token keyword">int</span><span class="token punctuation">[</span><span class="token punctuation">]</span> seeds <span class="token operator">=</span> <span class="token punctuation">{</span><span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">,</span> <span class="token number">7</span><span class="token punctuation">,</span> <span class="token number">11</span><span class="token punctuation">,</span> <span class="token number">13</span><span class="token punctuation">,</span> <span class="token number">31</span><span class="token punctuation">,</span> <span class="token number">37</span><span class="token punctuation">,</span> <span class="token number">61</span><span class="token punctuation">}</span><span class="token punctuation">;</span>

    <span class="token comment">/**
     * 相当于构建 8 个不同的hash算法
     */</span>
    <span class="token keyword">private</span> <span class="token keyword">static</span> <span class="token class-name">HashFunction</span><span class="token punctuation">[</span><span class="token punctuation">]</span> functions <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">HashFunction</span><span class="token punctuation">[</span>seeds<span class="token punctuation">.</span>length<span class="token punctuation">]</span><span class="token punctuation">;</span>

    <span class="token comment">/**
     * 初始化布隆过滤器的 bitmap
     */</span>
    <span class="token keyword">private</span> <span class="token keyword">static</span> <span class="token class-name">BitSet</span> bitset <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">BitSet</span><span class="token punctuation">(</span>DEFAULT_SIZE<span class="token punctuation">)</span><span class="token punctuation">;</span>

    <span class="token comment">/**
     * 添加数据
     *
     * @param value 需要加入的值
     */</span>
    <span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">void</span> <span class="token function">add</span><span class="token punctuation">(</span><span class="token class-name">String</span> value<span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token keyword">if</span> <span class="token punctuation">(</span>value <span class="token operator">!=</span> <span class="token keyword">null</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
            <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token class-name">HashFunction</span> f <span class="token operator">:</span> functions<span class="token punctuation">)</span> <span class="token punctuation">{</span>
                <span class="token comment">//计算 hash 值并修改 bitmap 中相应位置为 true</span>
                bitset<span class="token punctuation">.</span><span class="token function">set</span><span class="token punctuation">(</span>f<span class="token punctuation">.</span><span class="token function">hash</span><span class="token punctuation">(</span>value<span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token boolean">true</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
            <span class="token punctuation">}</span>
        <span class="token punctuation">}</span>
    <span class="token punctuation">}</span>

    <span class="token comment">/**
     * 判断相应元素是否存在
     * @param value 需要判断的元素
     * @return 结果
     */</span>
    <span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">boolean</span> <span class="token function">contains</span><span class="token punctuation">(</span><span class="token class-name">String</span> value<span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token keyword">if</span> <span class="token punctuation">(</span>value <span class="token operator">==</span> <span class="token keyword">null</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
            <span class="token keyword">return</span> <span class="token boolean">false</span><span class="token punctuation">;</span>
        <span class="token punctuation">}</span>
        <span class="token keyword">boolean</span> ret <span class="token operator">=</span> <span class="token boolean">true</span><span class="token punctuation">;</span>
        <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token class-name">HashFunction</span> f <span class="token operator">:</span> functions<span class="token punctuation">)</span> <span class="token punctuation">{</span>
            ret <span class="token operator">=</span> bitset<span class="token punctuation">.</span><span class="token function">get</span><span class="token punctuation">(</span>f<span class="token punctuation">.</span><span class="token function">hash</span><span class="token punctuation">(</span>value<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
            <span class="token comment">//一个 hash 函数返回 false 则跳出循环</span>
            <span class="token keyword">if</span> <span class="token punctuation">(</span><span class="token operator">!</span>ret<span class="token punctuation">)</span> <span class="token punctuation">{</span>
                <span class="token keyword">break</span><span class="token punctuation">;</span>
            <span class="token punctuation">}</span>
        <span class="token punctuation">}</span>
        <span class="token keyword">return</span> ret<span class="token punctuation">;</span>
    <span class="token punctuation">}</span>

    <span class="token comment">/**
     * 模拟用户是不是会员，或用户在不在线。。。
     */</span>
    <span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">void</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token class-name">String</span><span class="token punctuation">[</span><span class="token punctuation">]</span> args<span class="token punctuation">)</span> <span class="token punctuation">{</span>

        <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> seeds<span class="token punctuation">.</span>length<span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
            functions<span class="token punctuation">[</span>i<span class="token punctuation">]</span> <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">HashFunction</span><span class="token punctuation">(</span>DEFAULT_SIZE<span class="token punctuation">,</span> seeds<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token punctuation">}</span>

        <span class="token comment">// 添加1亿数据</span>
        <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> <span class="token number">100000000</span><span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
            <span class="token function">add</span><span class="token punctuation">(</span><span class="token class-name">String</span><span class="token punctuation">.</span><span class="token function">valueOf</span><span class="token punctuation">(</span>i<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token punctuation">}</span>
        <span class="token class-name">String</span> id <span class="token operator">=</span> <span class="token string">&quot;123456789&quot;</span><span class="token punctuation">;</span>
        <span class="token function">add</span><span class="token punctuation">(</span>id<span class="token punctuation">)</span><span class="token punctuation">;</span>

        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span><span class="token function">contains</span><span class="token punctuation">(</span>id<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>   <span class="token comment">// true</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span><span class="token string">&quot;&quot;</span> <span class="token operator">+</span> <span class="token function">contains</span><span class="token punctuation">(</span><span class="token string">&quot;234567890&quot;</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">//false</span>
    <span class="token punctuation">}</span>
<span class="token punctuation">}</span>

<span class="token keyword">class</span> <span class="token class-name">HashFunction</span> <span class="token punctuation">{</span>

    <span class="token keyword">private</span> <span class="token keyword">int</span> size<span class="token punctuation">;</span>
    <span class="token keyword">private</span> <span class="token keyword">int</span> seed<span class="token punctuation">;</span>

    <span class="token keyword">public</span> <span class="token class-name">HashFunction</span><span class="token punctuation">(</span><span class="token keyword">int</span> size<span class="token punctuation">,</span> <span class="token keyword">int</span> seed<span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token keyword">this</span><span class="token punctuation">.</span>size <span class="token operator">=</span> size<span class="token punctuation">;</span>
        <span class="token keyword">this</span><span class="token punctuation">.</span>seed <span class="token operator">=</span> seed<span class="token punctuation">;</span>
    <span class="token punctuation">}</span>

    <span class="token keyword">public</span> <span class="token keyword">int</span> <span class="token function">hash</span><span class="token punctuation">(</span><span class="token class-name">String</span> value<span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token keyword">int</span> result <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
        <span class="token keyword">int</span> len <span class="token operator">=</span> value<span class="token punctuation">.</span><span class="token function">length</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> len<span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
            result <span class="token operator">=</span> seed <span class="token operator">*</span> result <span class="token operator">+</span> value<span class="token punctuation">.</span><span class="token function">charAt</span><span class="token punctuation">(</span>i<span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token punctuation">}</span>
        <span class="token keyword">int</span> r <span class="token operator">=</span> <span class="token punctuation">(</span>size <span class="token operator">-</span> <span class="token number">1</span><span class="token punctuation">)</span> <span class="token operator">&amp;</span> result<span class="token punctuation">;</span>
        <span class="token keyword">return</span> <span class="token punctuation">(</span>size <span class="token operator">-</span> <span class="token number">1</span><span class="token punctuation">)</span> <span class="token operator">&amp;</span> result<span class="token punctuation">;</span>
    <span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</code></pre></div><blockquote><p>What？我们写的这些早有大牛帮我们实现，还造轮子，真是浪费时间，No，No，No，我们学习过程中是可以造轮子的，造轮子本身就是我们自己对设计和实现的具体落地过程，不仅能提高我们的编程能力，在造轮子的过程中肯定会遇到很多我们没有思考过的问题，成长看的见~~</p></blockquote> <blockquote><p>实际项目使用的时候，领导和我说项目一定要稳定运行，没自信的我放弃了自己的轮子。</p></blockquote> <h3 id="guava-中的-bloomfilter"><a href="#guava-中的-bloomfilter" class="header-anchor">#</a> Guava 中的 BloomFilter</h3> <div class="language-xml extra-class"><pre class="language-xml"><code><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>dependency</span><span class="token punctuation">&gt;</span></span>
    <span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>groupId</span><span class="token punctuation">&gt;</span></span>com.google.guava<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>groupId</span><span class="token punctuation">&gt;</span></span>
    <span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>artifactId</span><span class="token punctuation">&gt;</span></span>guava<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>artifactId</span><span class="token punctuation">&gt;</span></span>
    <span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>version</span><span class="token punctuation">&gt;</span></span>23.0<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>version</span><span class="token punctuation">&gt;</span></span>
<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>dependency</span><span class="token punctuation">&gt;</span></span>
</code></pre></div><div class="language-java extra-class"><pre class="language-java"><code><span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">GuavaBloomFilterDemo</span> <span class="token punctuation">{</span>

    <span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">void</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token class-name">String</span><span class="token punctuation">[</span><span class="token punctuation">]</span> args<span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token comment">//后边两个参数：预计包含的数据量，和允许的误差值</span>
        <span class="token class-name">BloomFilter</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token class-name">Integer</span><span class="token punctuation">&gt;</span></span> bloomFilter <span class="token operator">=</span> <span class="token class-name">BloomFilter</span><span class="token punctuation">.</span><span class="token function">create</span><span class="token punctuation">(</span><span class="token class-name">Funnels</span><span class="token punctuation">.</span><span class="token function">integerFunnel</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">100000</span><span class="token punctuation">,</span> <span class="token number">0.01</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> <span class="token number">100000</span><span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
            bloomFilter<span class="token punctuation">.</span><span class="token function">put</span><span class="token punctuation">(</span>i<span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token punctuation">}</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>bloomFilter<span class="token punctuation">.</span><span class="token function">mightContain</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>bloomFilter<span class="token punctuation">.</span><span class="token function">mightContain</span><span class="token punctuation">(</span><span class="token number">2</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>bloomFilter<span class="token punctuation">.</span><span class="token function">mightContain</span><span class="token punctuation">(</span><span class="token number">3</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>bloomFilter<span class="token punctuation">.</span><span class="token function">mightContain</span><span class="token punctuation">(</span><span class="token number">100001</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>

        <span class="token comment">//bloomFilter.writeTo();</span>
    <span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</code></pre></div><p>分布式环境中，布隆过滤器肯定还需要考虑是可以共享的资源，这时候我们会想到 Redis，是的，Redis 也实现了布隆过滤器。</p> <p><strong>当然我们也可以把布隆过滤器通过 <code>bloomFilter.writeTo()</code> 写入一个文件，放入OSS、S3这类对象存储中。</strong></p> <h3 id="redis-中的-bloomfilter"><a href="#redis-中的-bloomfilter" class="header-anchor">#</a> Redis 中的 BloomFilter</h3> <p>Redis 提供的 bitMap 可以实现布隆过滤器，但是需要自己设计映射函数和一些细节，这和我们自定义没啥区别。</p> <p>Redis 官方提供的布隆过滤器到了 Redis 4.0 提供了插件功能之后才正式登场。布隆过滤器作为一个插件加载到 Redis Server 中，给 Redis 提供了强大的布隆去重功能。</p> <p>在已安装 Redis 的前提下，安装 RedisBloom，有两种方式</p> <p><strong>直接编译进行安装</strong></p> <div class="language-shell extra-class"><pre class="language-shell"><code><span class="token function">git</span> clone https://github.com/RedisBloom/RedisBloom.git
<span class="token builtin class-name">cd</span> RedisBloom
<span class="token function">make</span>     <span class="token comment">#编译 会生成一个rebloom.so文件</span>
redis-server --loadmodule /path/to/rebloom.so   <span class="token comment">#运行redis时加载布隆过滤器模块</span>
redis-cli    <span class="token comment"># 启动连接容器中的 redis 客户端验证</span>
</code></pre></div><p><strong>使用Docker进行安装</strong></p> <div class="language-shell extra-class"><pre class="language-shell"><code>docker pull redislabs/rebloom:latest <span class="token comment"># 拉取镜像</span>
docker run -p <span class="token number">6379</span>:6379 --name redis-redisbloom redislabs/rebloom:latest <span class="token comment">#运行容器</span>
docker <span class="token builtin class-name">exec</span> -it redis-redisbloom <span class="token function">bash</span>
redis-cli     
</code></pre></div><p><strong>使用</strong></p> <p>布隆过滤器基本指令：</p> <ul><li>bf.add 添加元素到布隆过滤器</li> <li>bf.exists 判断元素是否在布隆过滤器</li> <li>bf.madd 添加多个元素到布隆过滤器，bf.add 只能添加一个</li> <li>bf.mexists 判断多个元素是否在布隆过滤器</li></ul> <div class="language- extra-class"><pre class="language-text"><code>127.0.0.1:6379&gt; bf.add user Tom
(integer) 1
127.0.0.1:6379&gt; bf.add user John
(integer) 1
127.0.0.1:6379&gt; bf.exists user Tom
(integer) 1
127.0.0.1:6379&gt; bf.exists user Linda
(integer) 0
127.0.0.1:6379&gt; bf.madd user Barry Jerry Mars
1) (integer) 1
2) (integer) 1
3) (integer) 1
127.0.0.1:6379&gt; bf.mexists user Barry Linda
1) (integer) 1
2) (integer) 0
</code></pre></div><p>我们只有这几个参数，肯定不会有误判，当元素逐渐增多时，就会有一定的误判了，这里就不做这个实验了。</p> <p>上面使用的布隆过滤器只是默认参数的布隆过滤器，它在我们第一次 add 的时候自动创建。</p> <p>Redis 还提供了自定义参数的布隆过滤器，<code>bf.reserve 过滤器名 error_rate initial_size</code></p> <ul><li>error_rate：允许布隆过滤器的错误率，这个值越低过滤器的位数组的大小越大，占用空间也就越大</li> <li>initial_size：布隆过滤器可以储存的元素个数，当实际存储的元素个数超过这个值之后，过滤器的准确率会下降</li></ul> <p>但是这个操作需要在 add 之前显式创建。如果对应的 key 已经存在，bf.reserve 会报错</p> <div class="language- extra-class"><pre class="language-text"><code>127.0.0.1:6379&gt; bf.reserve user 0.01 100
(error) ERR item exists
127.0.0.1:6379&gt; bf.reserve topic 0.01 1000
OK
</code></pre></div><p>我是一名 Javaer，肯定还要用 Java 来实现的，Java 的 Redis 客户端比较多，有些还没有提供指令扩展机制，笔者已知的 Redisson 和 lettuce 是可以使用布隆过滤器的，我们这里用 <a href="https://github.com/redisson/redisson/wiki/6.-%E5%88%86%E5%B8%83%E5%BC%8F%E5%AF%B9%E8%B1%A1#68-%E5%B8%83%E9%9A%86%E8%BF%87%E6%BB%A4%E5%99%A8bloom-filter" target="_blank" rel="noopener noreferrer">Redisson<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a></p> <div class="language-java extra-class"><pre class="language-java"><code><span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">RedissonBloomFilterDemo</span> <span class="token punctuation">{</span>

    <span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">void</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token class-name">String</span><span class="token punctuation">[</span><span class="token punctuation">]</span> args<span class="token punctuation">)</span> <span class="token punctuation">{</span>

        <span class="token class-name">Config</span> config <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">Config</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        config<span class="token punctuation">.</span><span class="token function">useSingleServer</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">setAddress</span><span class="token punctuation">(</span><span class="token string">&quot;redis://127.0.0.1:6379&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token class-name">RedissonClient</span> redisson <span class="token operator">=</span> <span class="token class-name">Redisson</span><span class="token punctuation">.</span><span class="token function">create</span><span class="token punctuation">(</span>config<span class="token punctuation">)</span><span class="token punctuation">;</span>

        <span class="token class-name">RBloomFilter</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token class-name">String</span><span class="token punctuation">&gt;</span></span> bloomFilter <span class="token operator">=</span> redisson<span class="token punctuation">.</span><span class="token function">getBloomFilter</span><span class="token punctuation">(</span><span class="token string">&quot;user&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token comment">// 初始化布隆过滤器，预计统计元素数量为55000000，期望误差率为0.03</span>
        bloomFilter<span class="token punctuation">.</span><span class="token function">tryInit</span><span class="token punctuation">(</span><span class="token number">55000000L</span><span class="token punctuation">,</span> <span class="token number">0.03</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        bloomFilter<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span><span class="token string">&quot;Tom&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        bloomFilter<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span><span class="token string">&quot;Jack&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>bloomFilter<span class="token punctuation">.</span><span class="token function">count</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>   <span class="token comment">//2</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>bloomFilter<span class="token punctuation">.</span><span class="token function">contains</span><span class="token punctuation">(</span><span class="token string">&quot;Tom&quot;</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">//true</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>bloomFilter<span class="token punctuation">.</span><span class="token function">contains</span><span class="token punctuation">(</span><span class="token string">&quot;Linda&quot;</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">//false</span>
    <span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</code></pre></div><h2 id="扩展"><a href="#扩展" class="header-anchor">#</a> 扩展</h2> <p>为了解决布隆过滤器不能删除元素的问题，布谷鸟过滤器横空出世。论文《Cuckoo Filter：Better Than Bloom》作者将布谷鸟过滤器和布隆过滤器进行了深入的对比。相比布谷鸟过滤器而言布隆过滤器有以下不足：查询性能弱、空间利用效率低、不支持反向操作（删除）以及不支持计数。</p> <p>由于使用较少，暂不深入。</p> <blockquote><p>文章持续更新，可以微信搜「 <strong>JavaKeeper</strong> 」第一时间阅读，无套路领取 500+ 本电子书和 30+ 视频教学和源码，本文 <strong>GitHub</strong> <a href="https://github.com/Jstarfish/JavaKeeper" target="_blank" rel="noopener noreferrer">github.com/JavaKeeper<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a> 已经收录，Javaer 开发、面试必备技能兵器谱，有你想要的。</p></blockquote> <h2 id="参考与感谢"><a href="#参考与感谢" class="header-anchor">#</a> 参考与感谢</h2> <p>https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf</p> <p>http://www.justdojava.com/2019/10/22/bloomfilter/</p> <p>https://www.cnblogs.com/cpselvis/p/6265825.html</p> <p>https://juejin.im/post/5cc5aa7ce51d456e431adac5</p></div> <footer class="page-edit" style="display:none;"><!----> <!----></footer> <!----> <!----> <!----></main> <!----></div></div></div></div><div class="global-ui"><div class="back-to-ceiling" style="right:1rem;bottom:6rem;width:2.5rem;height:2.5rem;border-radius:.25rem;line-height:2.5rem;display:none;" data-v-db14854a data-v-db14854a><svg t="1574745035067" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5404" class="icon" data-v-db14854a><path d="M526.60727968 10.90185116a27.675 27.675 0 0 0-29.21455937 0c-131.36607665 82.28402758-218.69155461 228.01873535-218.69155402 394.07834331a462.20625001 462.20625001 0 0 0 5.36959153 69.94390903c1.00431239 6.55289093-0.34802892 13.13561351-3.76865779 18.80351572-32.63518765 54.11355614-51.75690182 118.55860487-51.7569018 187.94566865a371.06718723 371.06718723 0 0 0 11.50484808 91.98906777c6.53300375 25.50556257 41.68394495 28.14064038 52.69160883 4.22606766 17.37162448-37.73630017 42.14135425-72.50938081 72.80769204-103.21549295 2.18761121 3.04276886 4.15646224 6.24463696 6.40373557 9.22774369a1871.4375 1871.4375 0 0 0 140.04691725 5.34970492 1866.36093723 1866.36093723 0 0 0 140.04691723-5.34970492c2.24727335-2.98310674 4.21612437-6.18497483 6.3937923-9.2178004 30.66633723 30.70611158 55.4360664 65.4791928 72.80769147 103.21549355 11.00766384 23.91457269 46.15860503 21.27949489 52.69160879-4.22606768a371.15156223 371.15156223 0 0 0 11.514792-91.99901164c0-69.36717486-19.13165746-133.82216804-51.75690182-187.92578088-3.42062944-5.66790279-4.76302748-12.26056868-3.76865837-18.80351632a462.20625001 462.20625001 0 0 0 5.36959269-69.943909c-0.00994388-166.08943902-87.32547796-311.81420293-218.6915546-394.09823051zM605.93803103 357.87693858a93.93749974 93.93749974 0 1 1-187.89594924 6.1e-7 93.93749974 93.93749974 0 0 1 187.89594924-6.1e-7z" p-id="5405" data-v-db14854a></path><path d="M429.50777625 765.63860547C429.50777625 803.39355007 466.44236686 1000.39046097 512.00932183 1000.39046097c45.56695499 0 82.4922232-197.00623328 82.5015456-234.7518555 0-37.75494459-36.9345906-68.35043303-82.4922232-68.34111062-45.57627738-0.00932239-82.52019037 30.59548842-82.51086798 68.34111062z" p-id="5406" data-v-db14854a></path></svg></div><!----></div></div>
    <script src="/assets/js/app.447d4224.js" defer></script><script src="/assets/js/3.9d76740c.js" defer></script><script src="/assets/js/1.c4fd7d2e.js" defer></script><script src="/assets/js/33.0c53373c.js" defer></script>
  </body>
</html>
